
** Spatial approach, 1: Grid of product space ** 

	**********------------------------------**********
	* Contents: file creates product space with cells defined by combination of a and e_bin
	* Cell width is 3 kwh (s-bin) and 0.5 kg (a-bin)
	* Input is the raw product-level data 

capture log close 
clear all
set more off 

cd "R:\WSV2\TBu_AKe\Spatial_NEW"

log using spatial_1_raw_s, replace 
cd // display in log file 

** load product-level data ** 
use "R:\WSV2\TBu_AKe\Product Ban\Stata\Bunching_Empirics\WM_V10_precollapse_binbasis.dta"
 
gen kwh = ae // annual energy consumption 

scalar sigma = 0.59*47 // attribute slope from regulation
scalar kappa = 0.59*51.7 // intercept from regulation 
scalar D = 3 // increment for cell height 

	** std. energy consumption
gen s = kwh - `=sigma'*capacity    
replace s = s - `=kappa'  // values below zero are compliant 
sum s, detail // - 253 to 341

 ** basic descriptives
sum kwh capacity, detail
drop if kwh > 600 // max: DAEWOO can be dropped, reduce to 600. 
sort year
by year: summ kwh capacity, detail
tab capacity // size in kg 

summ eei, detail 
tab eei_bin // increments of 0.5 
rename eei_bin eei_bin2 
label variable eei_bin2 "EEI in 0.5 bins" 
	
	
*** bin construction  
	* a 
scalar int_a = 0.5 // steps
scalar low_a = 1.25 // minimum
scalar high_a = 16.25 // max 
	* eei
scalar int_e = 1 // steps --> this is the eei 
scalar low_e = 13 // minimum
scalar high_e = 143 
	* s (rotated to flatten grid) 
scalar int_s = 3 // steps
scalar low_s = -255 // minimum (0 is compliance line)
scalar high_s = 342 // max  


***************************************
** step 1: cut cells
***************************************
	* 1a: capacity  
egen a_bin=cut(capacity),at(`=low_a'(0.5)`=high_a')
replace a_bin = a_bin + (`=int_a'/2) // name by midpoint. bin 4.25 is technically (4.251 to 4.749), rename so 4.5 is (4.251 to 4.749)

summ a_bin, detail 
codebook a_bin 
	* result: 18 cells of width 0.5 
 
	* 1b: kwh via eei 
	    * maximum e in cell is determined by a and eei.
egen eei_bin=cut(eei),at(`=low_a'(1)`=high_a')
replace eei_bin = floor(eei_bin2)
codebook eei_bin // 84 unique values  

label variable eei_bin "EEI in 1-unit bins" // 84
label variable eei_bin2 "EEI in 0.5-unit bins" // 133 

	* 1c: kwh in standardized version
	
egen s_bin=cut(s),at(`=low_s'(3)`=high_s') // set bins of width 3 kwh 

sum s if s_bin == -3 // bin range is -3 to 0 
sum s if s_bin == 0 // 0 to 3

tab s_bin if eei_bin == 58 // -3 and - 6

tab s_bin if eei_bin == 57 // -9 -6 and -3 
tab s_bin if eei_bin == 59 // > 0. ok.  

*** store cell boundaries in local macros / scalars  ***
sum a_bin, meanonly
scalar max_a = `r(max)'	
scalar min_a = `r(min)'
	** number of steps 
scalar n_a = 1+ (`=max_a' - `=min_a')/0.5

*** e_bin based on eei (trapezoid cells)
sum eei_bin, meanonly
scalar max_eei = `r(max)'	
scalar min_eei = `r(min)'
scalar int_eei = 1
	** number of steps 
scalar n_e = 1+ (`=max_eei' - `=min_eei')/`=int_eei'

*** s_bin (rectangular: compliance line at zero, cells of width = 3 kwh)
sum s_bin, meanonly
scalar max_s = `r(max)'	
scalar min_s = `r(min)'
scalar int_s = 3
	** number of steps 
scalar n_s = 1+ (`=max_s' - `=min_s')/`=int_s'
	*** save before proceeding 
*cd "C:\Users\hy65byfe\Desktop\smerge_0712"
*save spatial_0_raw_s, replace 

********************************************
*** expand to full grid (balanced panel) *** 

sort a_bin

gen ts = . // identifier for cells
gen added_a = 0 // tag added observations  

sum a_bin, meanonly
	
forvalues i=`=min_a'(0.5)`=max_a' {
replace ts= (2*(`i') - 5) if a_bin==`i' 
	* ex: 2*3 - 5 = 1 for smallest bin in range of capacities 
}


replace ts = 1 if ts == . // fix missings to avoid drop. 
by ts, sort: gen panel_a=_n // running list of cell names
tsset panel_a ts // set: 1 to 25 with gaps 
tsfill // fill 
tab ts // no gaps, range 1-25 (all available size brackets) 

by ts, sort: replace panel_a=_n
replace added_a = 1 if added_a == . // identifies added observations 
drop if panel_a > 1 & added_a == 1  // reduce to only 1 observation per added cell


gen a_binf = 0 
summ ts, meanonly // only integers, continuous list 
	
forvalues i=`=r(min)'(1)`=r(max)' {
replace a_binf= (0.5*(`i') + 2.5) if ts==`i' 
}

tab a_binf // all values  

replace a_bin = a_binf if added_a == 1 // replace from auxiliary to main variable 
 
tab a_bin

drop ts a_binf // drop aux. variables 


***** repeat for s_bin *****

sort s_bin

gen ts = . // identifier
gen added_s = 0 // tag added observations 


summ s_bin, meanonly
 
forvalues i=`=min_s'(3)`=max_s' {
replace ts= ((`i')/ 3) if s_bin==`i' 
		* ex: x/3 = 1 for smallest
}


replace ts = 1 if ts == . // fix missings to avoid drop 
by ts, sort: gen panel_s=_n // running list of cell names 
tsset panel_s ts // list with gaps 
tsfill // fill 
tab ts // no gaps.
by ts, sort: replace panel_s=_n
replace added_s = 1 if added_s == . // identifies added observations 
drop if panel_s > 1 & added_s == 1  // reduce to only 1 observation per added cell 


gen s_binf = 0 
summ ts, meanonly // only integers
	
forvalues i=`=r(min)'(1)`=r(max)' {
replace s_binf= ((`i')*3) if ts==`i' 
}

tab s_binf // complete list  

replace s_bin = s_binf if added_s == 1 // fill from aux. variable  
 
tab s_bin

drop ts s_binf // drop aux. variable 


	*** e in kwh/year, assignment via eei *** 
sort eei_bin

gen ts = . // identifier 
gen added_eei = 0  // tag 

summ eei_bin, meanonly

forvalues i=`=min_eei'(1)`=max_eei' { 
	replace ts= (2*(`i') - 25) if eei_bin==`i' 
}

replace ts = 1 if ts == . // fix missings 
by ts, sort: gen panel_e=_n // 
tsset panel_e ts // 
tsfill // 
tab ts // 1-261 w/o gaps. 

by ts, sort: replace panel_e=_n 
replace added_e = 1 if added_e == . 
drop if panel_e > 1 & added_e == 1 

gen eei_binf = 0 
summ ts, meanonly
	
forvalues i=`r(min)'(1)`r(max)' { 
	replace eei_binf=((`i') + 12) if ts==`i'
}

replace eei_bin = eei_binf if added_eei == 1  // fill from aux. variable 
tab eei_bin 

drop ts eei_binf

***********************************************
*** complete grid by filling in combinations ****
 
sort a_bin s_bin

fillin a_bin s_bin	// expand grid to all combinations. 

** adjust: fillin sets year to missing, would get dropped in collapse  
replace year = 2017 if year == . & _fillin == 1 // replace if created by _fillin 
tab year, missing // 79 obs still missing year. 
list id units a_bin s_bin if year == . // all have missing id. no concern. 
replace year = 2017 if year == . // fix rest. 


**** BREAK 1 ******
save spatial_1_raw_s, replace 
*******************

log close 

clear 

exit 


/*
** gen e_bin: upper limit in cell is e_max = EEI/100 (kappa + sigma*a) 
		
gen e_max_digit = 0 		
bysort eei_bin a_bin: replace e_max_digit = ((eei_bin+1)/100)*(47*a_bin+51.7)
tab e_max_digit if a_bin == 6 & eei_bin == 58 // test individual cell: should be 196. 
sum kwh if a_bin == 6 & eei_bin == 58

gen e_min_digit = 0 		
bysort eei_bin a_bin: replace e_min_digit = ((eei_bin)/100)*(47*a_bin+51.7)
tab e_min_digit if a_bin == 6 & eei_bin == 58 // test individual cell: should be 193.
 
gen e_bin = 0 		
replace e_bin = floor(e_max) // rounding  
label variable e_bin "e(max exact)" 
label variable e_bin "e(max integer)" */ 
